Clustering ASVs

Code
library(dplyr) ; library(tidyr) ; library(ggplot2)
# library(doParallel) ; library(foreach) ; library(doSNOW)

root <- rprojroot::has_file(".git/index")
datadir = root$find_file("data")
funsdir = root$find_file("functions")
savingdir = root$find_file("saved_files")
savingdirOceanSlices = root$find_file("saved_files_oceanSlices")
savingdirBinary = root$find_file("saved_files_binary") 

datapath = root$find_file(paste0(datadir,'/','grump.phaeocystis_asv_long.csv'))
files_vec <- list.files(funsdir)

for( i in 1:length(files_vec)){
  source(root$find_file(paste0(funsdir,'/',files_vec[i])))
}

dframe = data.table::fread(input = datapath) %>% filter(Cruise!="MOSAiC",Raw.Sequence.Counts>0)
dframe_allASVs = tidy_grump(Dframe = dframe,binnary = T)

useStoredFiles = T

The idea is to create a ‘custom’ distance matrix, to induce the grouppings.

  • \(c_1\) is within the same species - but no unassigned
  • \(c_2\) is between assigned species
  • \(c_3\) is between species and unassigned
  • \(c_4\) is within unassigned and unassigned

For this document we will use only the following configuration:

\(c_1=1\), \(c_2=1000\), \(c_3=10\) , \(c_4=10\)

A priori, the ‘probability’ of unassigned ASVs to agglomerate between themselves is the same as if it was to agglomerate with other species. Large \(c_2\) makes it harder for known species ASVs to group between themselves.

Code
inducedDist_ = inducedDist(
  dFrame = dframe_allASVs$dframe,
  c1 = 1,c2=1000,c3=10,c4=10,
  compMatrix = dframe_allASVs$ASV_composition %>% data.frame())

If we exclude the ASVs that appears only one ~or two~ times, does it help?

Code
nASVs <- dframe_allASVs$dframe %>% select(ASV_name) %>% distinct() %>% nrow()

plot0s_summ = dframe_allASVs$dframe %>% group_by(ASV_name,SampleKey) %>% 
  summarise(Freq=n()) %>%
  mutate(nSamplesObserved = sum(Freq)) %>% 
  arrange(nSamplesObserved) %>% 
  select(ASV_name,nSamplesObserved) %>% distinct() %>%
  mutate(FreqNSamples=cut(nSamplesObserved,breaks = c(0,1,2,3,5,10,25,50,100,250,1500),right = T,include.lowest = T)) %>% 
  group_by(FreqNSamples) %>% 
  summarise(Freq=n(),Pct=n()/nASVs) %>% 
  mutate(CummPct = cumsum(Pct))

### Creating vectors with id of ASVs to remove:
ASVs2Remove_df = dframe_allASVs$dframe %>% group_by(ASV_name,SampleKey) %>% 
  summarise(Freq=n()) %>%
  mutate(nSamplesObserved = sum(Freq)) %>% 
  arrange(nSamplesObserved) %>% 
  select(ASV_name,nSamplesObserved) %>% distinct() 

vetASVs_2orMore = ASVs2Remove_df %>% filter(nSamplesObserved<2) %>% select(ASV_name) %>% pull()
vetASVs_3orMore = ASVs2Remove_df %>% filter(nSamplesObserved<3) %>% select(ASV_name) %>% pull()

saveRDS(vetASVs_2orMore,file = paste0(savingdirOceanSlices,'/','asvs_to_remove_oneSample'))
saveRDS(vetASVs_3orMore,file = paste0(savingdirOceanSlices,'/','asvs_to_remove_twoSample'))


nsamples = dframe_allASVs$dframe %>% select(SampleID) %>% distinct() %>% nrow()
plot0s_summ2 = dframe_allASVs$dframe %>% 
  group_by(SampleKey) %>% 
  summarise(NumbersOfAsvInEachSample=n()) %>% 
  arrange(NumbersOfAsvInEachSample) %>% 
  mutate(FreqAsvs=cut(NumbersOfAsvInEachSample,breaks = c(0,1,2,3,5,10,25,50,100,250,500),
                      right = T,include.lowest = T)) %>% 
  group_by(FreqAsvs) %>% 
  summarise(Freq=n(),Pct=n()/nsamples) %>% 
  mutate(CummPct = cumsum(Pct))

plot0s_summ %>% knitr::kable(digits = 3) %>% kableExtra::kable_styling(full_width = FALSE, position = "left")
FreqNSamples Freq Pct CummPct
[0,1] 715 0.482 0.482
(1,2] 169 0.114 0.596
(2,3] 80 0.054 0.650
(3,5] 103 0.070 0.720
(5,10] 121 0.082 0.802
(10,25] 129 0.087 0.889
(25,50] 67 0.045 0.934
(50,100] 48 0.032 0.966
(100,250] 41 0.028 0.994
(250,1.5e+03] 9 0.006 1.000
Code
plot0s_summ2 %>% knitr::kable(digits = 3) %>% kableExtra::kable_styling(full_width = FALSE, position = "float_right")
FreqAsvs Freq Pct CummPct
[0,1] 90 0.091 0.091
(1,2] 58 0.059 0.150
(2,3] 36 0.036 0.186
(3,5] 62 0.063 0.249
(5,10] 161 0.163 0.412
(10,25] 243 0.246 0.659
(25,50] 253 0.256 0.915
(50,100] 82 0.083 0.998
(100,250] 2 0.002 1.000

Lets now compare the distance matrix using mds, ltns, and umap.

I’m suggesting three different weight metrics:

  1. We create the sample compositions, and add the RA of each ASV across all samples. I’m calling this as Sum_RA

  2. Count in how many samples each ASV appeared. Denoting this metric as Sum_Apearance

  3. Sum_SeqCounts = Sum of all RawSeqCounts for each ASVs.

Code
if(!useStoredFiles){
  aDist = vegan::vegdist(x = dframe_allASVs$ASV_composition %>% select(-name) %>%
                           data.frame(),method = 'aitchison')
  coord_plots_all <- distMatrix_dimReduc_coords(distMatrix = as.matrix(aDist))
  saveRDS(coord_plots_all,file = paste0(savingdir,'/','coord_plots_all'))
}

coord_plots_all  = readRDS(file = paste0(savingdir,'/','coord_plots_all'))

dim_reduce_obj_all <- plot_dimReduc_coords(coord_plots_all,weights = df_weights$Sum_RA,clusters = df_weights$Species)

dim_reduce_obj_all$MDS_2d
dim_reduce_obj_all$nMDS_2d
dim_reduce_obj_all$TSNE_2d
dim_reduce_obj_all$umap_2d

Code
if(!useStoredFiles){
  aDist = vegan::vegdist(x = dframe2plus$ASV_composition %>% select(-name) %>% data.frame(),method = 'aitchison')
  coord_plots_2ormore <- distMatrix_dimReduc_coords(distMatrix = as.matrix(aDist))
  saveRDS(coord_plots_2ormore,file = paste0(savingdir,'/','coord_plots_2ormore'))
}

coord_plots_2ormore  = readRDS(file = paste0(savingdir,'/','coord_plots_2ormore'))

dim_reduce_obj_2ormore <- plot_dimReduc_coords(
  coord_plots_2ormore,weights = df_weights2plus$Sum_RA,
  clusters = df_weights2plus$Species)

dim_reduce_obj_2ormore$MDS_2d
dim_reduce_obj_2ormore$nMDS_2d
dim_reduce_obj_2ormore$TSNE_2d
dim_reduce_obj_2ormore$umap_2d

Code
dim(dframe2plus$ASV_composition)
[1] 767 975

We have 767 ASVs and 975 samples

Code
if(!useStoredFiles){
  aDist = vegan::vegdist(x = dframe3plus$ASV_composition %>% select(-name) %>% data.frame(),method = 'aitchison')
  coord_plots_3ormore <- distMatrix_dimReduc_coords(distMatrix = as.matrix(aDist),perplexityTsne = 150)
  saveRDS(coord_plots_3ormore,file = paste0(savingdir,'/','coord_plots_3ormore'))
}

coord_plots_3ormore  = readRDS(file = paste0(savingdir,'/','coord_plots_3ormore'))

dim_reduce_obj_3ormore <- plot_dimReduc_coords(
  coord_plots_3ormore,
  weights = df_weights3plus$Sum_RA,clusters = df_weights3plus$Species)

dim_reduce_obj_3ormore$MDS_2d
dim_reduce_obj_3ormore$nMDS_2d
dim_reduce_obj_3ormore$TSNE_2d
dim_reduce_obj_3ormore$umap_2d

Code
dim(dframe3plus$ASV_composition)
[1] 598 971

598 ASVs and 970 samples.

It feels that removing rare ASVs won’t help..

Code
if(!useStoredFiles){
  brayDist = vegan::vegdist(x = dframe_allASVs$ASV_composition %>% select(-name) %>%
                              data.frame(),method = 'bray')
  coord_plots_all_bray <- distMatrix_dimReduc_coords(distMatrix = as.matrix(brayDist))
  saveRDS(coord_plots_all_bray,file = paste0(savingdir,'/','coord_plots_all_bray'))
}

coord_plots_all_bray  = readRDS(file = paste0(savingdir,'/','coord_plots_all_bray'))

dim_reduce_obj_all_bray <- plot_dimReduc_coords(
  coord_plots_all_bray,weights = df_weights$Sum_RA,
  clusters = df_weights$Species)

dim_reduce_obj_all_bray$MDS_2d
dim_reduce_obj_all_bray$nMDS_2d
dim_reduce_obj_all_bray$TSNE_2d
dim_reduce_obj_all_bray$umap_2d

Code
if(!useStoredFiles){
  brayDist = vegan::vegdist(x = dframe2plus$ASV_composition %>% select(-name) %>% data.frame(),method = 'bray')
  coord_plots_2ormore_bray <- distMatrix_dimReduc_coords(distMatrix = as.matrix(brayDist))
  saveRDS(coord_plots_2ormore_bray,file = paste0(savingdir,'/','coord_plots_2ormore_bray'))
}

coord_plots_2ormore_bray  = readRDS(file = paste0(savingdir,'/','coord_plots_2ormore_bray'))

dim_reduce_obj_2ormore_bray <- plot_dimReduc_coords(
  coord_plots_2ormore_bray,
  weights = df_weights2plus$Sum_RA,
  clusters = df_weights2plus$Species)

dim_reduce_obj_2ormore_bray$MDS_2d
dim_reduce_obj_2ormore_bray$nMDS_2d
dim_reduce_obj_2ormore_bray$TSNE_2d
dim_reduce_obj_2ormore_bray$umap_2d

Code
dim(dframe2plus$ASV_composition)
[1] 767 975

We have 767 ASVs and 975 samples

Code
if(!useStoredFiles){
  brayDist = vegan::vegdist(x = dframe3plus$ASV_composition %>% select(-name) %>% data.frame(),method = 'bray')
  coord_plots_3ormore_bray <- distMatrix_dimReduc_coords(distMatrix = as.matrix(brayDist),perplexityTsne = 150)
  saveRDS(coord_plots_3ormore_bray,file = paste0(savingdir,'/','coord_plots_3ormore_bray'))
}

coord_plots_3ormore_bray  = readRDS(file = paste0(savingdir,'/','coord_plots_3ormore_bray'))

dim_reduce_obj_3ormore_bray <- plot_dimReduc_coords(
  coord_plots_3ormore_bray,
  weights = df_weights3plus$Sum_RA,
  clusters = df_weights3plus$Species)

dim_reduce_obj_3ormore_bray$MDS_2d
dim_reduce_obj_3ormore_bray$nMDS_2d
dim_reduce_obj_3ormore_bray$TSNE_2d
dim_reduce_obj_3ormore_bray$umap_2d

Code
dim(dframe3plus$ASV_composition)
[1] 598 971

598 ASVs and 970 samples.

It feels that removing rare ASVs won’t help..

Code
if(!useStoredFiles){
  hammDistMatrix = hammingDist(X = dframe_allASVs$binnary_ASV_df %>% select(-ASV_name) %>%  as.matrix())
  coord_plots_all_bin <- distMatrix_dimReduc_coords(distMatrix = as.matrix(hammDistMatrix),perplexityTsne = 150)
  saveRDS(coord_plots_all_bin,file = paste0(savingdir,'/','coord_plots_all_bin'))
}

coord_plots_all_bin  = readRDS(file = paste0(savingdir,'/','coord_plots_all_bin'))

coord_plots_all_bin_plots <- plot_dimReduc_coords(
  coord_plots_all_bin,
  weights = df_weights$Sum_RA,
  clusters =  df_weights$Species)

coord_plots_all_bin_plots$MDS_2d
coord_plots_all_bin_plots$nMDS_2d
coord_plots_all_bin_plots$TSNE_2d
coord_plots_all_bin_plots$umap_2d

Code
if(!useStoredFiles){
  hammDistMatrix = hammingDist(X = dframe2plus$binnary_ASV_df %>% select(-ASV_name) %>%  as.matrix())
  coord_plots_2ormore_bin <- distMatrix_dimReduc_coords(distMatrix = as.matrix(hammDistMatrix),perplexityTsne = 150)
  saveRDS(coord_plots_2ormore_bin,file = paste0(savingdir,'/','coord_plots_2ormore_bin'))
}

coord_plots_2ormore_bin  = readRDS(file = paste0(savingdir,'/','coord_plots_2ormore_bin'))

coord_plots_2ormore_bin_plots <- plot_dimReduc_coords(
  coord_plots_2ormore_bin,
  weights = df_weights2plus$Sum_RA,
  clusters = df_weights2plus$Species)

coord_plots_2ormore_bin_plots$MDS_2d
coord_plots_2ormore_bin_plots$nMDS_2d
coord_plots_2ormore_bin_plots$TSNE_2d
coord_plots_2ormore_bin_plots$umap_2d

We have 767 ASVs and 975 samples

Code
if(!useStoredFiles){
  hammDistMatrix = hammingDist(X = dframe3plus$binnary_ASV_df %>% select(-ASV_name) %>%  as.matrix())
  coord_plots_3ormore_bin <- distMatrix_dimReduc_coords(distMatrix = as.matrix(hammDistMatrix),perplexityTsne = 150)
  saveRDS(coord_plots_3ormore_bin,file = paste0(savingdir,'/','coord_plots_3ormore_bin'))
  }

coord_plots_3ormore_bin  = readRDS(file = paste0(savingdir,'/','coord_plots_3ormore_bin'))

coord_plots_3ormore_bin_plots <- plot_dimReduc_coords(
  coord_plots_3ormore_bin,
  weights = df_weights3plus$Sum_RA,
  clusters = df_weights3plus$Species)

coord_plots_3ormore_bin_plots$MDS_2d
coord_plots_3ormore_bin_plots$nMDS_2d
coord_plots_3ormore_bin_plots$TSNE_2d
coord_plots_3ormore_bin_plots$umap_2d

598 ASVs and 971 samples.

Can we create a mixture of those tow distances and see if a pattern emerge?

Code
if(!useStoredFiles){
  hammDistMatrix = hammingDist(X = dframe_allASVs$binnary_ASV_df %>% select(-ASV_name) %>%  as.matrix())
  aitDistMatrix = robCompositions::aDist(x = dframe_allASVs$ASV_composition %>% select(-name) %>% data.frame())
  
  fifty_fifty_matrix = 0.5 * normalizeDistMatrix(hammDistMatrix) + 0.5 * normalizeDistMatrix(aitDistMatrix)
  coord_plots_mixed_All <- distMatrix_dimReduc_coords(distMatrix = as.matrix(hammDistMatrix),perplexityTsne = 150)
  saveRDS(coord_plots_mixed_All,file = paste0(savingdir,'/','coord_plots_mixed_All'))
}

coord_plots_mixed_All  = readRDS(paste0(savingdir,'/','coord_plots_mixed_All'))

coord_plots_mixed_All_plots <- plot_dimReduc_coords(coord_plots_mixed_All,weights = df_weights$Sum_RA)

coord_plots_mixed_All_plots$MDS_2d
coord_plots_mixed_All_plots$nMDS_2d
coord_plots_mixed_All_plots$TSNE_2d
coord_plots_mixed_All_plots$umap_2d

Haven’t compared it yet, but could be and Idea…

Here the idea is to aggregate some samples. So we would have compositions of ASVs but instead of samples we would have a less sparse and with less dimensions. The most natural could be:

  1. Compositions of ASVs, under longhurst provinces combined with different depths
  2. Chunks of latitudes, and chunks of depths.

So for each aggregation we should take a look at mds, tsne, umap of the ait distances (or distance matrix using the CLR transformation)

So first lets do a bit of EDA to see what we can expect.

Code
dframe_allASVs$dframe %>% select(SampleKey,Depth,Longhurst_Short) %>%  distinct() %>% 
  ggplot(aes(Depth))+geom_histogram()
dframe_allASVs$dframe %>% select(SampleKey,Depth,Longhurst_Short) %>%  distinct() %>% 
  ggplot(aes(Depth))+geom_boxplot()

Let’s slice the dephts of the ocean in 10%.

Code
distinct_depth <- dframe_allASVs$dframe %>% select(SampleKey,Depth) %>% distinct()
qtls_0.1 = quantile(distinct_depth$Depth, seq(0,1,0.1))
qtlsNames = 1:(length(qtls_0.1)-1)
qtlsNames = ifelse(qtlsNames<10,paste0('DGR0',qtlsNames),paste0('DGR',qtlsNames))
qtls_0.1
        0%        10%        20%        30%        40%        50%        60% 
   0.00000    1.18800   15.00000   28.80741   45.00000   61.50000   81.29718 
       70%        80%        90%       100% 
 105.47600  150.64307  335.50200 2568.80000 

Here are the depth quantiles.

Code
distinct_depth = distinct_depth %>% 
  mutate(CatDepth=cut(Depth,breaks = qtls_0.1,include.lowest = T,right = T,labels = qtlsNames))

distinct_depth %>% group_by(CatDepth) %>% 
  summarise(Freq=n())
# A tibble: 10 × 2
   CatDepth  Freq
   <fct>    <int>
 1 DGR01       99
 2 DGR02      115
 3 DGR03       82
 4 DGR04      112
 5 DGR05       86
 6 DGR06       98
 7 DGR07       99
 8 DGR08       98
 9 DGR09       99
10 DGR10       99
Code
dframe_allASVs$dframe %>% select(SampleKey,Depth,Longhurst_Short) %>% distinct() %>% 
  mutate(CatDepth=cut(Depth,breaks = qtls_0.1,include.lowest = T,right = T,labels = qtlsNames)) %>% 
  ggplot(aes(CatDepth))+geom_bar()+
  facet_wrap(~Longhurst_Short,scales='free_y')

This is the distribution of number of samples for each GDR (group depth rank for each longhurst province.

So our first tentative will be to create compositions of asvs with this amount of combinations (GDRxLH)

Code
Lat_Depth_Composition_df=readRDS(file = paste0(savingdir,'/','Lat_Depth_Composition_df'))

if(!useStoredFiles){
  #hammDistMatrix = hammingDist(X = dframe_allASVs$binnary_ASV_df %>% select(-ASV_name) %>%  as.matrix())
  aitDistMatrix = robCompositions::aDist(x = Lat_Depth_Composition_df %>% select(-name) %>% data.frame())
  Lat_Depth_ait_all_coord <- distMatrix_dimReduc_coords(distMatrix = as.matrix(aitDistMatrix),perplexityTsne = 150)
  saveRDS(Lat_Depth_ait_all_coord,file = paste0(savingdir,'/','Lat_Depth_ait_all_coord'))
}

Lat_Depth_ait_all_coord  = readRDS(file = paste0(savingdir,'/','Lat_Depth_ait_all_coord'))

Lat_Depth_ait_all_coord_plots <- plot_dimReduc_coords(Lat_Depth_ait_all_coord)

Lat_Depth_ait_all_coord_plots$MDS_2d

Code
Lat_Depth_ait_all_coord_plots$nMDS_2d

Code
Lat_Depth_ait_all_coord_plots$TSNE_2d

Code
Lat_Depth_ait_all_coord_plots$umap_2d

Code
LH_Depth_Composition_df=readRDS(file = paste0(savingdir,'/','LH_Depth_Composition_df'))

if(!useStoredFiles){
  #hammDistMatrix = hammingDist(X = dframe_allASVs$binnary_ASV_df %>% select(-ASV_name) %>%  as.matrix())
  aitDistMatrix = robCompositions::aDist(x = LH_Depth_Composition_df %>% select(-name) %>% data.frame())
  LH_Depth_ait_all_coord <- distMatrix_dimReduc_coords(distMatrix = as.matrix(aitDistMatrix),perplexityTsne = 150)
  saveRDS(LH_Depth_ait_all_coord,file = paste0(savingdir,'/','LH_Depth_ait_all_coord'))
}

LH_Depth_ait_all_coord  = readRDS(file = paste0(savingdir,'/','LH_Depth_ait_all_coord'))

LH_Depth_ait_all_coord_plots <- plot_dimReduc_coords(LH_Depth_ait_all_coord)

LH_Depth_ait_all_coord_plots$MDS_2d
LH_Depth_ait_all_coord_plots$nMDS_2d
LH_Depth_ait_all_coord_plots$TSNE_2d
LH_Depth_ait_all_coord_plots$umap_2d

Code
## Varying Alpha
## Alpha == 0 ---------------------------------------------------------------------------------------------------------------------
running = F
if(running){
  hammingDistMatrix = hammingDist(dframe_allASVs$binnary_ASV_df %>% select(-ASV_name) %>% as.matrix())
  hammingDistMatrix = normalizeDistMatrix(hammingDistMatrix)
  
  inducedDist_ = inducedDist(
    dFrame = dframe_allASVs$dframe,
    c1 = 1,c2=1000,c3=10,c4=10,
    compMatrix = dframe_allASVs$ASV_composition %>% data.frame())
  
  myDist = inducedDist_$normalizedMyDist
  
  hamming_clusters_alpha0 <- clusterize_ward_pam(
    distMatrix = hammingDistMatrix,
    maxnclust = 50,dFrameAsv = dframe_allASVs$binnary_ASV_df$ASV_name)
  saveRDS(hamming_clusters_alpha0,file = paste0(savingdir,'/','hamming_clusters_alpha0'))
  
  
  ## Varying Alpha
  ## Alpha == 0.1 ---------------------------------------------------------------------------------------------------------------------
  alpha_ = 0.1
  mydist = (1-alpha_) * hammingDistMatrix + alpha_*inducedDist_$normalizedMyDist
  
  hamming_clusters_alpha0.1 <- clusterize_ward_pam(
    distMatrix = mydist,
    maxnclust = 50,dFrameAsv = dframe_allASVs$binnary_ASV_df$ASV_name)
  saveRDS(hamming_clusters_alpha0.1,file = paste0(savingdir,'/','hamming_clusters_alpha0.1'))
  
  ## Alpha == 0.25 ---------------------------------------------------------------------------------------------------------------------
  alpha_ = 0.25
  mydist = (1-alpha_) * hammingDistMatrix + alpha_*inducedDist_$normalizedMyDist
  
  hamming_clusters_alpha0.25 <- clusterize_ward_pam(
    distMatrix = mydist,
    maxnclust = 50,dFrameAsv = dframe_allASVs$binnary_ASV_df$ASV_name)
  saveRDS(hamming_clusters_alpha0.25,file = paste0(savingdir,'/','hamming_clusters_alpha0.25'))
  
  
  ## Alpha == 0.35 ---------------------------------------------------------------------------------------------------------------------
  alpha_ = 0.35
  mydist = (1-alpha_) * hammingDistMatrix + alpha_*inducedDist_$normalizedMyDist
  
  hamming_clusters_alpha0.35 <- clusterize_ward_pam(
    distMatrix = mydist,
    maxnclust = 50,dFrameAsv = dframe_allASVs$binnary_ASV_df$ASV_name)
  saveRDS(hamming_clusters_alpha0.35,file = paste0(savingdir,'/','hamming_clusters_alpha0.35'))
}
Code
running = F
if(running){
 inducedDist_ = inducedDist(
    dFrame = dframe2plus$dframe,
    c1 = 1,c2=1000,c3=10,c4=10,
    compMatrix = dframe2plus$ASV_composition %>% data.frame())
  hammingDistMatrix = hammingDist(dframe2plus$binnary_ASV_df %>% select(-ASV_name) %>% as.matrix())
  hammingDistMatrix = normalizeDistMatrix(hammingDistMatrix)
  
  ## Varying Alpha
  ## Alpha == 0 ---------------------------------------------------------------------------------------------------------------------
  
  hamming_clusters_alpha0 <- clusterize_ward_pam(
    distMatrix = hammingDistMatrix,
    maxnclust = 50,dFrameAsv = dframe2plus$binnary_ASV_df$ASV_name)
  saveRDS(hamming_clusters_alpha0,file = paste0(savingdir,'/','hamming_clusters_2plus_alpha0'))
  
  ## Varying Alpha
  ## Alpha == 0.1 ---------------------------------------------------------------------------------------------------------------------
  alpha_ = 0.1
  mydist = (1-alpha_) * hammingDistMatrix + alpha_*inducedDist_$normalizedMyDist
  
  hamming_clusters_alpha0.1 <- clusterize_ward_pam(
    distMatrix = mydist,
    maxnclust = 50,dFrameAsv = dframe2plus$binnary_ASV_df$ASV_name)
  saveRDS(hamming_clusters_alpha0.1,file = paste0(savingdir,'/','hamming_clusters_2plus_alpha0.1'))
  
  
  ## Alpha == 0.25 ---------------------------------------------------------------------------------------------------------------------
  alpha_ = 0.25
  mydist = (1-alpha_) * hammingDistMatrix + alpha_*inducedDist_$normalizedMyDist
  
  hamming_clusters_alpha0.25 <- clusterize_ward_pam(
    distMatrix = mydist,
    maxnclust = 50,dFrameAsv = dframe2plus$binnary_ASV_df$ASV_name)
  saveRDS(hamming_clusters_alpha0.25,file = paste0(savingdir,'/','hamming_clusters_2plus_alpha0.25'))
  ## Alpha == 0.35 ---------------------------------------------------------------------------------------------------------------------
  alpha_ = 0.35
  mydist = (1-alpha_) * hammingDistMatrix + alpha_*inducedDist_$normalizedMyDist
  
  hamming_clusters_alpha0.35 <- clusterize_ward_pam(
    distMatrix = mydist,
    maxnclust = 50,dFrameAsv = dframe2plus$binnary_ASV_df$ASV_name)
  saveRDS(hamming_clusters_alpha0.35,file = paste0(savingdir,'/','hamming_clusters_2plus_alpha0.35'))
}
Code
running = F
if(running){
  inducedDist_ = inducedDist(
    dFrame = dframe3plus$dframe,
    c1 = 1,c2=1000,c3=10,c4=10,
    compMatrix = dframe3plus$ASV_composition %>% data.frame())
  hammingDistMatrix = hammingDist(dframe3plus$binnary_ASV_df %>% select(-ASV_name) %>% as.matrix())
  hammingDistMatrix = normalizeDistMatrix(hammingDistMatrix)
  
  ## Varying Alpha
  ## Alpha == 0 ---------------------------------------------------------------------------------------------------------------------
  
  hamming_clusters_alpha0 <- clusterize_ward_pam(
    distMatrix = hammingDistMatrix,
    maxnclust = 50,dFrameAsv = dframe3plus$binnary_ASV_df$ASV_name)
  saveRDS(hamming_clusters_alpha0,file = paste0(savingdir,'/','hamming_clusters_3plus_alpha0'))
  
  ## Varying Alpha
  ## Alpha == 0.1 ---------------------------------------------------------------------------------------------------------------------
  alpha_ = 0.1
  mydist = (1-alpha_) * hammingDistMatrix + alpha_*inducedDist_$normalizedMyDist
  #running = F
  
  hamming_clusters_alpha0.1 <- clusterize_ward_pam(
    distMatrix = mydist,
    maxnclust = 50,dFrameAsv = dframe3plus$binnary_ASV_df$ASV_name)
  saveRDS(hamming_clusters_alpha0.1,file = paste0(savingdir,'/','hamming_clusters_3plus_alpha0.1'))
  
  
  ## Alpha == 0.25 ---------------------------------------------------------------------------------------------------------------------
  alpha_ = 0.25
  mydist = (1-alpha_) * hammingDistMatrix + alpha_*inducedDist_$normalizedMyDist
  #running = F
  
  hamming_clusters_alpha0.25 <- clusterize_ward_pam(
    distMatrix = mydist,
    maxnclust = 50,dFrameAsv = dframe3plus$binnary_ASV_df$ASV_name)
  saveRDS(hamming_clusters_alpha0.25,file = paste0(savingdir,'/','hamming_clusters_3plus_alpha0.25'))
  ## Alpha == 0.35 ---------------------------------------------------------------------------------------------------------------------
  alpha_ = 0.35
  mydist = (1-alpha_) * hammingDistMatrix + alpha_*inducedDist_$normalizedMyDist
  #running = F
  hamming_clusters_alpha0.35 <- clusterize_ward_pam(
    distMatrix = mydist,
    maxnclust = 50,dFrameAsv = dframe3plus$binnary_ASV_df$ASV_name)
  saveRDS(hamming_clusters_alpha0.35,file = paste0(savingdir,'/','hamming_clusters_3plus_alpha0.35'))
}
Code
## Varying Alpha
## Alpha == 0 ---------------------------------------------------------------------------------------------------------------------
running = F
if(running){
  hammingDistMatrix = hammingDist(dframe_allASVs$binnary_ASV_df %>% select(-ASV_name) %>% as.matrix())
  hammingDistMatrix = normalizeDistMatrix(hammingDistMatrix)
  
  inducedDist_ = inducedDist(
    dFrame = dframe_allASVs$dframe,
    c1 = 1,c2=1000,c3=10,c4=10,
    compMatrix = dframe_allASVs$ASV_composition %>% data.frame())
  
  hamming_clusters_alpha0 <- clusterize_ward_pam_weighted(
    distMatrix = hammingDistMatrix,
    maxnclust = 50,dFrameAsv = dframe_allASVs$binnary_ASV_df$ASV_name,weights_vec = df_weights$Sum_RA)
  
  saveRDS(hamming_clusters_alpha0,file = paste0(savingdir,'/','hamming_weighted_clusters_alpha0'))
  
  ## Varying Alpha
  ## Alpha == 0.1 ---------------------------------------------------------------------------------------------------------------------
  alpha_ = 0.1
  mydist = (1-alpha_) * hammingDistMatrix + alpha_*inducedDist_$normalizedMyDist
  
  hamming_clusters_alpha0.1 <- clusterize_ward_pam_weighted(
    distMatrix = mydist,
    maxnclust = 50,dFrameAsv = dframe_allASVs$binnary_ASV_df$ASV_name,weights_vec = df_weights$Sum_RA)
  saveRDS(hamming_clusters_alpha0.1,file = paste0(savingdir,'/','hamming_weighted_clusters_alpha0.1'))
  
  ## Alpha == 0.25 ---------------------------------------------------------------------------------------------------------------------
  alpha_ = 0.25
  mydist = (1-alpha_) * hammingDistMatrix + alpha_*inducedDist_$normalizedMyDist
  
  hamming_clusters_alpha0.25 <- clusterize_ward_pam_weighted(
    distMatrix = mydist,
    maxnclust = 50,dFrameAsv = dframe_allASVs$binnary_ASV_df$ASV_name,weights_vec = df_weights$Sum_RA)
  saveRDS(hamming_clusters_alpha0.25,file = paste0(savingdir,'/','hamming_weighted_clusters_alpha0.25'))
  
  
  ## Alpha == 0.35 ---------------------------------------------------------------------------------------------------------------------
  alpha_ = 0.35
  mydist = (1-alpha_) * hammingDistMatrix + alpha_*inducedDist_$normalizedMyDist
  
  hamming_clusters_alpha0.35 <- clusterize_ward_pam_weighted(
    distMatrix = mydist,
    maxnclust = 50,dFrameAsv = dframe_allASVs$binnary_ASV_df$ASV_name,weights_vec = df_weights$Sum_RA)
  saveRDS(hamming_clusters_alpha0.35,file = paste0(savingdir,'/','hamming_weighted_clusters_alpha0.35'))
}
Code
running = F
if(running){
  inducedDist_ = inducedDist(
    dFrame = dframe2plus$dframe,
    c1 = 1,c2=1000,c3=10,c4=10,
    compMatrix = dframe2plus$ASV_composition %>% data.frame())
  hammingDistMatrix = hammingDist(dframe2plus$binnary_ASV_df %>% select(-ASV_name) %>% as.matrix())
  hammingDistMatrix = normalizeDistMatrix(hammingDistMatrix)
  
  ## Varying Alpha
  ## Alpha == 0 ---------------------------------------------------------------------------------------------------------------------
  
  hamming_clusters_alpha0 <- clusterize_ward_pam_weighted(
    distMatrix = hammingDistMatrix,
    maxnclust = 50,dFrameAsv = dframe2plus$binnary_ASV_df$ASV_name,weights_vec = df_weights2plus$Sum_RA)
  saveRDS(hamming_clusters_alpha0,file = paste0(savingdir,'/','hamming_weighted_clusters_2plus_alpha0'))
  
  ## Varying Alpha
  ## Alpha == 0.1 ---------------------------------------------------------------------------------------------------------------------
  alpha_ = 0.1
  mydist = (1-alpha_) * hammingDistMatrix + alpha_*inducedDist_$normalizedMyDist
  
  hamming_clusters_alpha0.1 <- clusterize_ward_pam_weighted(
    distMatrix = mydist,
    maxnclust = 50,dFrameAsv = dframe2plus$binnary_ASV_df$ASV_name,weights_vec = df_weights2plus$Sum_RA)
  saveRDS(hamming_clusters_alpha0.1,file = paste0(savingdir,'/','hamming_weighted_clusters_2plus_alpha0.1'))
  
  
  ## Alpha == 0.25 ---------------------------------------------------------------------------------------------------------------------
  alpha_ = 0.25
  mydist = (1-alpha_) * hammingDistMatrix + alpha_*inducedDist_$normalizedMyDist
  
  hamming_clusters_alpha0.25 <- clusterize_ward_pam_weighted(
    distMatrix = mydist,
    maxnclust = 50,dFrameAsv = dframe2plus$binnary_ASV_df$ASV_name,weights_vec = df_weights2plus$Sum_RA)
  saveRDS(hamming_clusters_alpha0.25,file = paste0(savingdir,'/','hamming_weighted_clusters_2plus_alpha0.25'))
  ## Alpha == 0.35 ---------------------------------------------------------------------------------------------------------------------
  alpha_ = 0.35
  mydist = (1-alpha_) * hammingDistMatrix + alpha_*inducedDist_$normalizedMyDist
  
  hamming_clusters_alpha0.35 <- clusterize_ward_pam_weighted(
    distMatrix = mydist,
    maxnclust = 50,dFrameAsv = dframe2plus$binnary_ASV_df$ASV_name,weights_vec = df_weights2plus$Sum_RA)
  saveRDS(hamming_clusters_alpha0.35,file = paste0(savingdir,'/','hamming_weighted_clusters_2plus_alpha0.35'))
  
}
Code
running = F
if(running){
  
  inducedDist_ = inducedDist(
    dFrame = dframe3plus$dframe,
    c1 = 1,c2=1000,c3=10,c4=10,
    compMatrix = dframe3plus$ASV_composition %>% data.frame())
  hammingDistMatrix = hammingDist(dframe3plus$binnary_ASV_df %>% select(-ASV_name) %>% as.matrix())
  hammingDistMatrix = normalizeDistMatrix(hammingDistMatrix)
  
  ## Varying Alpha
  ## Alpha == 0 ---------------------------------------------------------------------------------------------------------------------
  
  hamming_clusters_alpha0 <- clusterize_ward_pam_weighted(
    distMatrix = hammingDistMatrix,
    maxnclust = 50,dFrameAsv = dframe3plus$binnary_ASV_df$ASV_name,weights_vec = df_weights3plus$Sum_RA)
  saveRDS(hamming_clusters_alpha0,file = paste0(savingdir,'/','hamming_weighted_clusters_3plus_alpha0'))
  
  ## Varying Alpha
  ## Alpha == 0.1 ---------------------------------------------------------------------------------------------------------------------
  alpha_ = 0.1
  mydist = (1-alpha_) * hammingDistMatrix + alpha_*inducedDist_$normalizedMyDist
  #running = F
  
  hamming_clusters_alpha0.1 <- clusterize_ward_pam_weighted(
    distMatrix = mydist,
    maxnclust = 50,dFrameAsv = dframe3plus$binnary_ASV_df$ASV_name,weights_vec = df_weights3plus$Sum_RA)
  saveRDS(hamming_clusters_alpha0.1,file = paste0(savingdir,'/','hamming_weighted_clusters_3plus_alpha0.1'))
  
  
  ## Alpha == 0.25 ---------------------------------------------------------------------------------------------------------------------
  alpha_ = 0.25
  mydist = (1-alpha_) * hammingDistMatrix + alpha_*inducedDist_$normalizedMyDist
  #running = F
  
  hamming_clusters_alpha0.25 <- clusterize_ward_pam_weighted(
    distMatrix = mydist,
    maxnclust = 50,dFrameAsv = dframe3plus$binnary_ASV_df$ASV_name,weights_vec = df_weights3plus$Sum_RA)
  saveRDS(hamming_clusters_alpha0.25,file = paste0(savingdir,'/','hamming_weighted_clusters_3plus_alpha0.25'))
  ## Alpha == 0.35 ---------------------------------------------------------------------------------------------------------------------
  alpha_ = 0.35
  mydist = (1-alpha_) * hammingDistMatrix + alpha_*inducedDist_$normalizedMyDist
  #running = F
  hamming_clusters_alpha0.35 <- clusterize_ward_pam_weighted(
    distMatrix = mydist,
    maxnclust = 50,dFrameAsv = dframe3plus$binnary_ASV_df$ASV_name,weights_vec = df_weights3plus$Sum_RA)
  saveRDS(hamming_clusters_alpha0.35,file = paste0(savingdir,'/','hamming_weighted_clusters_3plus_alpha0.35'))
  
}
Code
running = F
if(running){
  hamming_clusters_alpha0 = readRDS(file = paste0(savingdir,'/','hamming_clusters_alpha0'))
  hamming_clusters_alpha0.1 = readRDS(file = paste0(savingdir,'/','hamming_clusters_alpha0.1'))
  hamming_clusters_alpha0.25 = readRDS(file = paste0(savingdir,'/','hamming_clusters_alpha0.25'))
  hamming_clusters_alpha0.35 = readRDS(file = paste0(savingdir,'/','hamming_clusters_alpha0.35'))
  
  hammingDistMatrix = hammingDist(dframe_allASVs$binnary_ASV_df %>% select(-ASV_name) %>% as.matrix())
  hammingDistMatrix = normalizeDistMatrix(hammingDistMatrix)
  
  
  #### ------ alpha =0 
  list_eval <- list()
  list_eval$hclust<- list()
  list_eval$pam <- list()
  
  numberOfClusters = ncol(hamming_clusters_alpha0$dFrameAsv_hclust)-1
  cols_to_evaluate = colnames(hamming_clusters_alpha0$dFrameAsv_hclust)
  cols_to_evaluate = cols_to_evaluate[-1]
  
  for(i in 2: length(cols_to_evaluate)){
    list_eval$hclust[[i-1]] = eval_clusters_distMatrix(
      distMatrix = hammingDistMatrix,
      df_ASV_Clusters = hamming_clusters_alpha0$dFrameAsv_hclust %>% select(ASV=ASV,any_of(cols_to_evaluate[i]))
    )
    list_eval$pam[[i-1]] = eval_clusters_distMatrix(
      distMatrix = hammingDistMatrix,
      df_ASV_Clusters = hamming_clusters_alpha0$dFrameAsv_pam %>% select(ASV=ASV,any_of(cols_to_evaluate[i]))
    )
  }
  saveRDS(list_eval,paste0(savingdir,'/','list_eval_alpha0'))
  rm(list_eval)
  #### ------ alpha = 0.1
  list_eval <- list()
  list_eval$hclust<- list()
  list_eval$pam <- list()
  
  numberOfClusters = ncol(hamming_clusters_alpha0.1$dFrameAsv_hclust)-1
  cols_to_evaluate = colnames(hamming_clusters_alpha0.1$dFrameAsv_hclust)
  cols_to_evaluate = cols_to_evaluate[-1]
  
  for(i in 2: length(cols_to_evaluate)){
    list_eval$hclust[[i-1]] = eval_clusters_distMatrix(
      distMatrix = hammingDistMatrix,
      df_ASV_Clusters = hamming_clusters_alpha0.1$dFrameAsv_hclust %>% select(ASV=ASV,any_of(cols_to_evaluate[i]))
    )
    list_eval$pam[[i-1]] = eval_clusters_distMatrix(
      distMatrix = hammingDistMatrix,
      df_ASV_Clusters = hamming_clusters_alpha0.1$dFrameAsv_pam %>% select(ASV=ASV,any_of(cols_to_evaluate[i]))
    )
  }
  saveRDS(list_eval,paste0(savingdir,'/','list_eval_alpha0.1'))
  rm(list_eval)
  #### ------ alpha = 0.25
  list_eval <- list()
  list_eval$hclust<- list()
  list_eval$pam <- list()
  
  numberOfClusters = ncol(hamming_clusters_alpha0.25$dFrameAsv_hclust)-1
  cols_to_evaluate = colnames(hamming_clusters_alpha0.25$dFrameAsv_hclust)
  cols_to_evaluate = cols_to_evaluate[-1]
  
  for(i in 2: length(cols_to_evaluate)){
    list_eval$hclust[[i-1]] = eval_clusters_distMatrix(
      distMatrix = hammingDistMatrix,
      df_ASV_Clusters = hamming_clusters_alpha0.25$dFrameAsv_hclust %>% select(ASV=ASV,any_of(cols_to_evaluate[i]))
    )
    list_eval$pam[[i-1]] = eval_clusters_distMatrix(
      distMatrix = hammingDistMatrix,
      df_ASV_Clusters = hamming_clusters_alpha0.25$dFrameAsv_pam %>% select(ASV=ASV,any_of(cols_to_evaluate[i]))
    )
  }
  saveRDS(list_eval,paste0(savingdir,'/','list_eval_alpha0.25'))
  rm(list_eval)
  #### ------ alpha = 0.35
  list_eval <- list()
  list_eval$hclust<- list()
  list_eval$pam <- list()
  
  numberOfClusters = ncol(hamming_clusters_alpha0.35$dFrameAsv_hclust)-1
  cols_to_evaluate = colnames(hamming_clusters_alpha0.35$dFrameAsv_hclust)
  cols_to_evaluate = cols_to_evaluate[-1]
  
  for(i in 2: length(cols_to_evaluate)){
    list_eval$hclust[[i-1]] = eval_clusters_distMatrix(
      distMatrix = hammingDistMatrix,
      df_ASV_Clusters = hamming_clusters_alpha0.35$dFrameAsv_hclust %>% select(ASV=ASV,any_of(cols_to_evaluate[i]))
    )
    list_eval$pam[[i-1]] = eval_clusters_distMatrix(
      distMatrix = hammingDistMatrix,
      df_ASV_Clusters = hamming_clusters_alpha0.35$dFrameAsv_pam %>% select(ASV=ASV,any_of(cols_to_evaluate[i]))
    )
  }
  saveRDS(list_eval,paste0(savingdir,'/','list_eval_alpha0.35'))
}
Code
list_eval_alpha0 = readRDS(paste0(savingdir,'/','list_eval_alpha0'))
list_eval_alpha0.1 = readRDS(paste0(savingdir,'/','list_eval_alpha0.1'))
list_eval_alpha0.25 = readRDS(paste0(savingdir,'/','list_eval_alpha0.25'))
list_eval_alpha0.35 = readRDS(paste0(savingdir,'/','list_eval_alpha0.35'))

df_summary_eval = bind_rows(
  summarise_eval_clusters_distMatrix(list_eval_alpha0) %>% mutate(alpha=0),
  summarise_eval_clusters_distMatrix(list_eval_alpha0.1) %>% mutate(alpha=0.1),
  summarise_eval_clusters_distMatrix(list_eval_alpha0.25) %>% mutate(alpha=0.25),
  summarise_eval_clusters_distMatrix(list_eval_alpha0.35) %>% mutate(alpha=0.35))

df_summary_eval %>% 
  select(nclust,method,alpha,
         within_between_avg,numer_within_between_avg,denom_within_between_avg) %>% 
  pivot_longer(cols = -c(1,2,3)) %>% mutate(alpha = factor(alpha)) %>% 
  filter(nclust>2) %>% 
  ggplot(aes(x=nclust,y=value,color=method,linetype=alpha))+
  geom_line()+
  facet_wrap(~name,scales = 'free')+
  theme_minimal()+theme(legend.position = 'bottom')

Code
df_summary_eval %>% 
  select(nclust,method,alpha,everything()) %>% 
  pivot_longer(cols = -c(1,2,3)) %>% mutate(alpha = factor(alpha)) %>% 
  ggplot(aes(x=nclust,y=value,color=method,linetype=alpha))+
  geom_line()+
  facet_wrap(~name,scales = 'free')+
  theme_minimal()+theme(legend.position = 'bottom')

Code
running = F
if(running){
  
  hamming_clusters_alpha0 = readRDS(file = paste0(savingdir,'/','hamming_clusters_2plus_alpha0'))
  hamming_clusters_alpha0.1 = readRDS(file = paste0(savingdir,'/','hamming_clusters_2plus_alpha0.1'))
  hamming_clusters_alpha0.25 = readRDS(file = paste0(savingdir,'/','hamming_clusters_2plus_alpha0.25'))
  hamming_clusters_alpha0.35 = readRDS(file = paste0(savingdir,'/','hamming_clusters_2plus_alpha0.35'))
  
  hammingDistMatrix = hammingDist(dframe2plus$binnary_ASV_df %>% select(-ASV_name) %>% as.matrix())
  hammingDistMatrix = normalizeDistMatrix(hammingDistMatrix)
  
  #### ------ alpha =0 
  list_eval <- list()
  list_eval$hclust<- list()
  list_eval$pam <- list()
  
  numberOfClusters = ncol(hamming_clusters_alpha0$dFrameAsv_hclust)-1
  cols_to_evaluate = colnames(hamming_clusters_alpha0$dFrameAsv_hclust)
  cols_to_evaluate = cols_to_evaluate[-1]
  
  for(i in 2: length(cols_to_evaluate)){
    list_eval$hclust[[i-1]] = eval_clusters_distMatrix(
      distMatrix = hammingDistMatrix,
      df_ASV_Clusters = hamming_clusters_alpha0$dFrameAsv_hclust %>% select(ASV=ASV,any_of(cols_to_evaluate[i]))
    )
    list_eval$pam[[i-1]] = eval_clusters_distMatrix(
      distMatrix = hammingDistMatrix,
      df_ASV_Clusters = hamming_clusters_alpha0$dFrameAsv_pam %>% select(ASV=ASV,any_of(cols_to_evaluate[i]))
    )
  }
  saveRDS(list_eval,paste0(savingdir,'/','list_eval_2more_alpha0'))
  rm(list_eval)
  #### ------ alpha = 0.1 -----------------------------------------------------------------
  list_eval <- list()
  list_eval$hclust<- list()
  list_eval$pam <- list()
  
  numberOfClusters = ncol(hamming_clusters_alpha0.1$dFrameAsv_hclust)-1
  cols_to_evaluate = colnames(hamming_clusters_alpha0.1$dFrameAsv_hclust)
  cols_to_evaluate = cols_to_evaluate[-1]
  
  for(i in 2: length(cols_to_evaluate)){
    list_eval$hclust[[i-1]] = eval_clusters_distMatrix(
      distMatrix = hammingDistMatrix,
      df_ASV_Clusters = hamming_clusters_alpha0.1$dFrameAsv_hclust %>% select(ASV=ASV,any_of(cols_to_evaluate[i]))
    )
    list_eval$pam[[i-1]] = eval_clusters_distMatrix(
      distMatrix = hammingDistMatrix,
      df_ASV_Clusters = hamming_clusters_alpha0.1$dFrameAsv_pam %>% select(ASV=ASV,any_of(cols_to_evaluate[i]))
    )
  }
  saveRDS(list_eval,paste0(savingdir,'/','list_eval_2more_alpha0.1'))
  rm(list_eval)
  
  #### ------ alpha = 0.25 -----------------------------------------------------------------
  list_eval <- list()
  list_eval$hclust<- list()
  list_eval$pam <- list()
  
  numberOfClusters = ncol(hamming_clusters_alpha0.25$dFrameAsv_hclust)-1
  cols_to_evaluate = colnames(hamming_clusters_alpha0.25$dFrameAsv_hclust)
  cols_to_evaluate = cols_to_evaluate[-1]
  
  for(i in 2: length(cols_to_evaluate)){
    list_eval$hclust[[i-1]] = eval_clusters_distMatrix(
      distMatrix = hammingDistMatrix,
      df_ASV_Clusters = hamming_clusters_alpha0.25$dFrameAsv_hclust %>% select(ASV=ASV,any_of(cols_to_evaluate[i]))
    )
    list_eval$pam[[i-1]] = eval_clusters_distMatrix(
      distMatrix = hammingDistMatrix,
      df_ASV_Clusters = hamming_clusters_alpha0.25$dFrameAsv_pam %>% select(ASV=ASV,any_of(cols_to_evaluate[i]))
    )
  }
  saveRDS(list_eval,paste0(savingdir,'/','list_eval_2more_alpha0.25'))
  rm(list_eval)
  
  #### ------ alpha = 0.35 -----------------------------------------------------------------
  list_eval <- list()
  list_eval$hclust<- list()
  list_eval$pam <- list()
  
  numberOfClusters = ncol(hamming_clusters_alpha0.35$dFrameAsv_hclust)-1
  cols_to_evaluate = colnames(hamming_clusters_alpha0.35$dFrameAsv_hclust)
  cols_to_evaluate = cols_to_evaluate[-1]
  
  for(i in 2: length(cols_to_evaluate)){
    list_eval$hclust[[i-1]] = eval_clusters_distMatrix(
      distMatrix = hammingDistMatrix,
      df_ASV_Clusters = hamming_clusters_alpha0.35$dFrameAsv_hclust %>% select(ASV=ASV,any_of(cols_to_evaluate[i]))
    )
    list_eval$pam[[i-1]] = eval_clusters_distMatrix(
      distMatrix = hammingDistMatrix,
      df_ASV_Clusters = hamming_clusters_alpha0.35$dFrameAsv_pam %>% select(ASV=ASV,any_of(cols_to_evaluate[i]))
    )
  }
  saveRDS(list_eval,paste0(savingdir,'/','list_eval_2more_alpha0.35'))
}
Code
list_eval_alpha0 = readRDS(paste0(savingdir,'/','list_eval_2more_alpha0'))
list_eval_alpha0.1 = readRDS(paste0(savingdir,'/','list_eval_2more_alpha0.1'))
list_eval_alpha0.25 = readRDS(paste0(savingdir,'/','list_eval_2more_alpha0.25'))
list_eval_alpha0.35 = readRDS(paste0(savingdir,'/','list_eval_2more_alpha0.35'))

df_summary_eval = bind_rows(
  summarise_eval_clusters_distMatrix(list_eval_alpha0) %>% mutate(alpha=0),
  summarise_eval_clusters_distMatrix(list_eval_alpha0.1) %>% mutate(alpha=0.1),
  summarise_eval_clusters_distMatrix(list_eval_alpha0.25) %>% mutate(alpha=0.25),
  summarise_eval_clusters_distMatrix(list_eval_alpha0.35) %>% mutate(alpha=0.35))

df_summary_eval %>% 
  select(nclust,method,alpha,
         within_between_avg,numer_within_between_avg,denom_within_between_avg) %>% 
  pivot_longer(cols = -c(1,2,3)) %>% mutate(alpha = factor(alpha)) %>% 
  filter(nclust>2) %>% 
  ggplot(aes(x=nclust,y=value,color=method,linetype=alpha))+
  geom_line()+
  facet_wrap(~name,scales = 'free')+
  theme_minimal()+theme(legend.position = 'bottom')

Code
df_summary_eval %>% 
  select(nclust,method,alpha,everything()) %>% 
  pivot_longer(cols = -c(1,2,3)) %>% mutate(alpha = factor(alpha)) %>% 
  ggplot(aes(x=nclust,y=value,color=method,linetype=alpha))+
  geom_line()+
  facet_wrap(~name,scales = 'free')+
  theme_minimal()+theme(legend.position = 'bottom')

Code
running = F
if(running){
  
  hamming_clusters_alpha0 = readRDS(file = paste0(savingdir,'/','hamming_clusters_3plus_alpha0'))
  hamming_clusters_alpha0.1 = readRDS(file = paste0(savingdir,'/','hamming_clusters_3plus_alpha0.1'))
  hamming_clusters_alpha0.25 = readRDS(file = paste0(savingdir,'/','hamming_clusters_3plus_alpha0.25'))
  hamming_clusters_alpha0.35 = readRDS(file = paste0(savingdir,'/','hamming_clusters_3plus_alpha0.35'))
  
  hammingDistMatrix = hammingDist(dframe3plus$binnary_ASV_df %>% select(-ASV_name) %>% as.matrix())
  hammingDistMatrix = normalizeDistMatrix(hammingDistMatrix)
  
  #### ------ alpha =0 
  list_eval <- list()
  list_eval$hclust<- list()
  list_eval$pam <- list()
  
  numberOfClusters = ncol(hamming_clusters_alpha0$dFrameAsv_hclust)-1
  cols_to_evaluate = colnames(hamming_clusters_alpha0$dFrameAsv_hclust)
  cols_to_evaluate = cols_to_evaluate[-1]
  
  for(i in 2: length(cols_to_evaluate)){
    list_eval$hclust[[i-1]] = eval_clusters_distMatrix(
      distMatrix = hammingDistMatrix,
      df_ASV_Clusters = hamming_clusters_alpha0$dFrameAsv_hclust %>% select(ASV=ASV,any_of(cols_to_evaluate[i]))
    )
    list_eval$pam[[i-1]] = eval_clusters_distMatrix(
      distMatrix = hammingDistMatrix,
      df_ASV_Clusters = hamming_clusters_alpha0$dFrameAsv_pam %>% select(ASV=ASV,any_of(cols_to_evaluate[i]))
    )
  }
  saveRDS(list_eval,paste0(savingdir,'/','list_eval_3more_alpha0'))
  rm(list_eval)
  #### ------ alpha = 0.1 -----------------------------------------------------------------
  list_eval <- list()
  list_eval$hclust<- list()
  list_eval$pam <- list()
  
  numberOfClusters = ncol(hamming_clusters_alpha0.1$dFrameAsv_hclust)-1
  cols_to_evaluate = colnames(hamming_clusters_alpha0.1$dFrameAsv_hclust)
  cols_to_evaluate = cols_to_evaluate[-1]
  
  for(i in 2: length(cols_to_evaluate)){
    list_eval$hclust[[i-1]] = eval_clusters_distMatrix(
      distMatrix = hammingDistMatrix,
      df_ASV_Clusters = hamming_clusters_alpha0.1$dFrameAsv_hclust %>% select(ASV=ASV,any_of(cols_to_evaluate[i]))
    )
    list_eval$pam[[i-1]] = eval_clusters_distMatrix(
      distMatrix = hammingDistMatrix,
      df_ASV_Clusters = hamming_clusters_alpha0.1$dFrameAsv_pam %>% select(ASV=ASV,any_of(cols_to_evaluate[i]))
    )
  }
  saveRDS(list_eval,paste0(savingdir,'/','list_eval_3more_alpha0.1'))
  rm(list_eval)
  
  #### ------ alpha = 0.25 -----------------------------------------------------------------
  list_eval <- list()
  list_eval$hclust<- list()
  list_eval$pam <- list()
  
  numberOfClusters = ncol(hamming_clusters_alpha0.25$dFrameAsv_hclust)-1
  cols_to_evaluate = colnames(hamming_clusters_alpha0.25$dFrameAsv_hclust)
  cols_to_evaluate = cols_to_evaluate[-1]
  
  for(i in 2: length(cols_to_evaluate)){
    list_eval$hclust[[i-1]] = eval_clusters_distMatrix(
      distMatrix = hammingDistMatrix,
      df_ASV_Clusters = hamming_clusters_alpha0.25$dFrameAsv_hclust %>% select(ASV=ASV,any_of(cols_to_evaluate[i]))
    )
    list_eval$pam[[i-1]] = eval_clusters_distMatrix(
      distMatrix = hammingDistMatrix,
      df_ASV_Clusters = hamming_clusters_alpha0.25$dFrameAsv_pam %>% select(ASV=ASV,any_of(cols_to_evaluate[i]))
    )
  }
  saveRDS(list_eval,paste0(savingdir,'/','list_eval_3more_alpha0.25'))
  rm(list_eval)
  
  #### ------ alpha = 0.35 -----------------------------------------------------------------
  list_eval <- list()
  list_eval$hclust<- list()
  list_eval$pam <- list()
  
  numberOfClusters = ncol(hamming_clusters_alpha0.35$dFrameAsv_hclust)-1
  cols_to_evaluate = colnames(hamming_clusters_alpha0.35$dFrameAsv_hclust)
  cols_to_evaluate = cols_to_evaluate[-1]
  
  for(i in 2: length(cols_to_evaluate)){
    list_eval$hclust[[i-1]] = eval_clusters_distMatrix(
      distMatrix = hammingDistMatrix,
      df_ASV_Clusters = hamming_clusters_alpha0.35$dFrameAsv_hclust %>% select(ASV=ASV,any_of(cols_to_evaluate[i]))
    )
    list_eval$pam[[i-1]] = eval_clusters_distMatrix(
      distMatrix = hammingDistMatrix,
      df_ASV_Clusters = hamming_clusters_alpha0.35$dFrameAsv_pam %>% select(ASV=ASV,any_of(cols_to_evaluate[i]))
    )
  }
  saveRDS(list_eval,paste0(savingdir,'/','list_eval_3more_alpha0.35'))
}
Code
list_eval_alpha0 = readRDS(paste0(savingdir,'/','list_eval_3more_alpha0'))
list_eval_alpha0.1 = readRDS(paste0(savingdir,'/','list_eval_3more_alpha0.1'))
list_eval_alpha0.25 = readRDS(paste0(savingdir,'/','list_eval_3more_alpha0.25'))
list_eval_alpha0.35 = readRDS(paste0(savingdir,'/','list_eval_3more_alpha0.35'))

df_summary_eval = bind_rows(
  summarise_eval_clusters_distMatrix(list_eval_alpha0) %>% mutate(alpha=0),
  summarise_eval_clusters_distMatrix(list_eval_alpha0.1) %>% mutate(alpha=0.1),
  summarise_eval_clusters_distMatrix(list_eval_alpha0.25) %>% mutate(alpha=0.25),
  summarise_eval_clusters_distMatrix(list_eval_alpha0.35) %>% mutate(alpha=0.35))

df_summary_eval %>% 
  select(nclust,method,alpha,
         within_between_avg,numer_within_between_avg,denom_within_between_avg) %>% 
  pivot_longer(cols = -c(1,2,3)) %>% mutate(alpha = factor(alpha)) %>% 
  filter(nclust>2) %>% 
  ggplot(aes(x=nclust,y=value,color=method,linetype=alpha))+
  geom_line()+
  facet_wrap(~name,scales = 'free')+
  theme_minimal()+theme(legend.position = 'bottom')

Code
df_summary_eval %>% 
  select(nclust,method,alpha,everything()) %>% 
  pivot_longer(cols = -c(1,2,3)) %>% mutate(alpha = factor(alpha)) %>% 
  ggplot(aes(x=nclust,y=value,color=method,linetype=alpha))+
  geom_line()+
  facet_wrap(~name,scales = 'free')+
  theme_minimal()+theme(legend.position = 'bottom')

Code
running = F
if(running){
  hamming_clusters_alpha0 = readRDS(file = paste0(savingdir,'/','hamming_weighted_clusters_alpha0'))
  hamming_clusters_alpha0.1 = readRDS(file = paste0(savingdir,'/','hamming_weighted_clusters_alpha0.1'))
  hamming_clusters_alpha0.25 = readRDS(file = paste0(savingdir,'/','hamming_weighted_clusters_alpha0.25'))
  hamming_clusters_alpha0.35 = readRDS(file = paste0(savingdir,'/','hamming_weighted_clusters_alpha0.35'))
  
  hammingDistMatrix = hammingDist(dframe_allASVs$binnary_ASV_df %>% select(-ASV_name) %>% as.matrix())
  hammingDistMatrix = normalizeDistMatrix(hammingDistMatrix)
  
  #### ------ alpha =0 
  list_eval <- list()
  list_eval$hclust<- list()
  list_eval$pam <- list()
  
  numberOfClusters = ncol(hamming_clusters_alpha0$dFrameAsv_hclust)-1
  cols_to_evaluate = colnames(hamming_clusters_alpha0$dFrameAsv_hclust)
  cols_to_evaluate = cols_to_evaluate[-1]
  
  for(i in 2: length(cols_to_evaluate)){
    list_eval$hclust[[i-1]] = eval_clusters_distMatrix(
      distMatrix = hammingDistMatrix,
      df_ASV_Clusters = hamming_clusters_alpha0$dFrameAsv_hclust %>% select(ASV=ASV,any_of(cols_to_evaluate[i]))
    )
    list_eval$pam[[i-1]] = eval_clusters_distMatrix(
      distMatrix = hammingDistMatrix,
      df_ASV_Clusters = hamming_clusters_alpha0$dFrameAsv_pam %>% select(ASV=ASV,any_of(cols_to_evaluate[i]))
    )
  }
  saveRDS(list_eval,paste0(savingdir,'/','list_eval_weighted_alpha0'))
  rm(list_eval)
  #### ------ alpha = 0.1
  list_eval <- list()
  list_eval$hclust<- list()
  list_eval$pam <- list()
  
  numberOfClusters = ncol(hamming_clusters_alpha0.1$dFrameAsv_hclust)-1
  cols_to_evaluate = colnames(hamming_clusters_alpha0.1$dFrameAsv_hclust)
  cols_to_evaluate = cols_to_evaluate[-1]
  
  for(i in 2: length(cols_to_evaluate)){
    list_eval$hclust[[i-1]] = eval_clusters_distMatrix(
      distMatrix = hammingDistMatrix,
      df_ASV_Clusters = hamming_clusters_alpha0.1$dFrameAsv_hclust %>% select(ASV=ASV,any_of(cols_to_evaluate[i]))
    )
    list_eval$pam[[i-1]] = eval_clusters_distMatrix(
      distMatrix = hammingDistMatrix,
      df_ASV_Clusters = hamming_clusters_alpha0.1$dFrameAsv_pam %>% select(ASV=ASV,any_of(cols_to_evaluate[i]))
    )
  }
  saveRDS(list_eval,paste0(savingdir,'/','list_eval_weighted_alpha0.1'))
  rm(list_eval)
  #### ------ alpha = 0.25
  list_eval <- list()
  list_eval$hclust<- list()
  list_eval$pam <- list()
  
  numberOfClusters = ncol(hamming_clusters_alpha0.25$dFrameAsv_hclust)-1
  cols_to_evaluate = colnames(hamming_clusters_alpha0.25$dFrameAsv_hclust)
  cols_to_evaluate = cols_to_evaluate[-1]
  
  for(i in 2: length(cols_to_evaluate)){
    list_eval$hclust[[i-1]] = eval_clusters_distMatrix(
      distMatrix = hammingDistMatrix,
      df_ASV_Clusters = hamming_clusters_alpha0.25$dFrameAsv_hclust %>% select(ASV=ASV,any_of(cols_to_evaluate[i]))
    )
    list_eval$pam[[i-1]] = eval_clusters_distMatrix(
      distMatrix = hammingDistMatrix,
      df_ASV_Clusters = hamming_clusters_alpha0.25$dFrameAsv_pam %>% select(ASV=ASV,any_of(cols_to_evaluate[i]))
    )
  }
  saveRDS(list_eval,paste0(savingdir,'/','list_eval_weighted_alpha0.25'))
  rm(list_eval)
  #### ------ alpha = 0.35
  list_eval <- list()
  list_eval$hclust<- list()
  list_eval$pam <- list()
  
  numberOfClusters = ncol(hamming_clusters_alpha0.35$dFrameAsv_hclust)-1
  cols_to_evaluate = colnames(hamming_clusters_alpha0.35$dFrameAsv_hclust)
  cols_to_evaluate = cols_to_evaluate[-1]
  
  for(i in 2: length(cols_to_evaluate)){
    list_eval$hclust[[i-1]] = eval_clusters_distMatrix(
      distMatrix = hammingDistMatrix,
      df_ASV_Clusters = hamming_clusters_alpha0.35$dFrameAsv_hclust %>% select(ASV=ASV,any_of(cols_to_evaluate[i]))
    )
    list_eval$pam[[i-1]] = eval_clusters_distMatrix(
      distMatrix = hammingDistMatrix,
      df_ASV_Clusters = hamming_clusters_alpha0.35$dFrameAsv_pam %>% select(ASV=ASV,any_of(cols_to_evaluate[i]))
    )
  }
  saveRDS(list_eval,paste0(savingdir,'/','list_eval_weighted_alpha0.35'))
}
Code
list_eval_alpha0 = readRDS(paste0(savingdir,'/','list_eval_weighted_alpha0'))
list_eval_alpha0.1 = readRDS(paste0(savingdir,'/','list_eval_weighted_alpha0.1'))
list_eval_alpha0.25 = readRDS(paste0(savingdir,'/','list_eval_weighted_alpha0.25'))
list_eval_alpha0.35 = readRDS(paste0(savingdir,'/','list_eval_weighted_alpha0.35'))

df_summary_eval = bind_rows(
  summarise_eval_clusters_distMatrix(list_eval_alpha0) %>% mutate(alpha=0),
  summarise_eval_clusters_distMatrix(list_eval_alpha0.1) %>% mutate(alpha=0.1),
  summarise_eval_clusters_distMatrix(list_eval_alpha0.25) %>% mutate(alpha=0.25),
  summarise_eval_clusters_distMatrix(list_eval_alpha0.35) %>% mutate(alpha=0.35))

df_summary_eval %>% 
  select(nclust,method,alpha,
         within_between_avg,numer_within_between_avg,denom_within_between_avg) %>% 
  pivot_longer(cols = -c(1,2,3)) %>% mutate(alpha = factor(alpha)) %>% 
  filter(nclust>2) %>% 
  ggplot(aes(x=nclust,y=value,color=method,linetype=alpha))+
  geom_line()+
  facet_wrap(~name,scales = 'free')+
  theme_minimal()+theme(legend.position = 'bottom')

Code
df_summary_eval %>% 
  select(nclust,method,alpha,everything()) %>% 
  pivot_longer(cols = -c(1,2,3)) %>% mutate(alpha = factor(alpha)) %>% 
  ggplot(aes(x=nclust,y=value,color=method,linetype=alpha))+
  geom_line()+
  facet_wrap(~name,scales = 'free')+
  theme_minimal()+theme(legend.position = 'bottom')

Code
running = F
if(running){
  
  hamming_clusters_alpha0 = readRDS(file = paste0(savingdir,'/','hamming_weighted_clusters_2plus_alpha0'))
  hamming_clusters_alpha0.1 = readRDS(file = paste0(savingdir,'/','hamming_weighted_clusters_2plus_alpha0.1'))
  hamming_clusters_alpha0.25 = readRDS(file = paste0(savingdir,'/','hamming_weighted_clusters_2plus_alpha0.25'))
  hamming_clusters_alpha0.35 = readRDS(file = paste0(savingdir,'/','hamming_weighted_clusters_2plus_alpha0.35'))
  
  hammingDistMatrix = hammingDist(dframe2plus$binnary_ASV_df %>% select(-ASV_name) %>% as.matrix())
  hammingDistMatrix = normalizeDistMatrix(hammingDistMatrix)
  
  #### ------ alpha =0 
  list_eval <- list()
  list_eval$hclust<- list()
  list_eval$pam <- list()
  
  numberOfClusters = ncol(hamming_clusters_alpha0$dFrameAsv_hclust)-1
  cols_to_evaluate = colnames(hamming_clusters_alpha0$dFrameAsv_hclust)
  cols_to_evaluate = cols_to_evaluate[-1]
  
  for(i in 2: length(cols_to_evaluate)){
    list_eval$hclust[[i-1]] = eval_clusters_distMatrix(
      distMatrix = hammingDistMatrix,
      df_ASV_Clusters = hamming_clusters_alpha0$dFrameAsv_hclust %>% select(ASV=ASV,any_of(cols_to_evaluate[i]))
    )
    list_eval$pam[[i-1]] = eval_clusters_distMatrix(
      distMatrix = hammingDistMatrix,
      df_ASV_Clusters = hamming_clusters_alpha0$dFrameAsv_pam %>% select(ASV=ASV,any_of(cols_to_evaluate[i]))
    )
  }
  
  saveRDS(list_eval,paste0(savingdir,'/','list_eval_weighted_2more_alpha0'))
  rm(list_eval)
  #### ------ alpha = 0.1 -----------------------------------------------------------------
  list_eval <- list()
  list_eval$hclust<- list()
  list_eval$pam <- list()
  
  numberOfClusters = ncol(hamming_clusters_alpha0.1$dFrameAsv_hclust)-1
  cols_to_evaluate = colnames(hamming_clusters_alpha0.1$dFrameAsv_hclust)
  cols_to_evaluate = cols_to_evaluate[-1]
  
  for(i in 2: length(cols_to_evaluate)){
    list_eval$hclust[[i-1]] = eval_clusters_distMatrix(
      distMatrix = hammingDistMatrix,
      df_ASV_Clusters = hamming_clusters_alpha0.1$dFrameAsv_hclust %>% select(ASV=ASV,any_of(cols_to_evaluate[i]))
    )
    list_eval$pam[[i-1]] = eval_clusters_distMatrix(
      distMatrix = hammingDistMatrix,
      df_ASV_Clusters = hamming_clusters_alpha0.1$dFrameAsv_pam %>% select(ASV=ASV,any_of(cols_to_evaluate[i]))
    )
  }
  saveRDS(list_eval,paste0(savingdir,'/','list_eval_weighted_2more_alpha0.1'))
  rm(list_eval)
  
  #### ------ alpha = 0.25 -----------------------------------------------------------------
  list_eval <- list()
  list_eval$hclust<- list()
  list_eval$pam <- list()
  
  numberOfClusters = ncol(hamming_clusters_alpha0.25$dFrameAsv_hclust)-1
  cols_to_evaluate = colnames(hamming_clusters_alpha0.25$dFrameAsv_hclust)
  cols_to_evaluate = cols_to_evaluate[-1]
  
  for(i in 2: length(cols_to_evaluate)){
    list_eval$hclust[[i-1]] = eval_clusters_distMatrix(
      distMatrix = hammingDistMatrix,
      df_ASV_Clusters = hamming_clusters_alpha0.25$dFrameAsv_hclust %>% select(ASV=ASV,any_of(cols_to_evaluate[i]))
    )
    list_eval$pam[[i-1]] = eval_clusters_distMatrix(
      distMatrix = hammingDistMatrix,
      df_ASV_Clusters = hamming_clusters_alpha0.25$dFrameAsv_pam %>% select(ASV=ASV,any_of(cols_to_evaluate[i]))
    )
  }
  saveRDS(list_eval,paste0(savingdir,'/','list_eval_weighted_2more_alpha0.25'))
  rm(list_eval)
  
  #### ------ alpha = 0.35 -----------------------------------------------------------------
  list_eval <- list()
  list_eval$hclust<- list()
  list_eval$pam <- list()
  
  numberOfClusters = ncol(hamming_clusters_alpha0.35$dFrameAsv_hclust)-1
  cols_to_evaluate = colnames(hamming_clusters_alpha0.35$dFrameAsv_hclust)
  cols_to_evaluate = cols_to_evaluate[-1]
  
  for(i in 2: length(cols_to_evaluate)){
    list_eval$hclust[[i-1]] = eval_clusters_distMatrix(
      distMatrix = hammingDistMatrix,
      df_ASV_Clusters = hamming_clusters_alpha0.35$dFrameAsv_hclust %>% select(ASV=ASV,any_of(cols_to_evaluate[i]))
    )
    list_eval$pam[[i-1]] = eval_clusters_distMatrix(
      distMatrix = hammingDistMatrix,
      df_ASV_Clusters = hamming_clusters_alpha0.35$dFrameAsv_pam %>% select(ASV=ASV,any_of(cols_to_evaluate[i]))
    )
  }
  saveRDS(list_eval,paste0(savingdir,'/','list_eval_weighted_2more_alpha0.35'))
}
Code
list_eval_alpha0 = readRDS(paste0(savingdir,'/','list_eval_weighted_2more_alpha0'))
list_eval_alpha0.1 = readRDS(paste0(savingdir,'/','list_eval_weighted_2more_alpha0.1'))
list_eval_alpha0.25 = readRDS(paste0(savingdir,'/','list_eval_weighted_2more_alpha0.25'))
list_eval_alpha0.35 = readRDS(paste0(savingdir,'/','list_eval_weighted_2more_alpha0.35'))

df_summary_eval = bind_rows(
  summarise_eval_clusters_distMatrix(list_eval_alpha0) %>% mutate(alpha=0),
  summarise_eval_clusters_distMatrix(list_eval_alpha0.1) %>% mutate(alpha=0.1),
  summarise_eval_clusters_distMatrix(list_eval_alpha0.25) %>% mutate(alpha=0.25),
  summarise_eval_clusters_distMatrix(list_eval_alpha0.35) %>% mutate(alpha=0.35))

df_summary_eval %>% 
  select(nclust,method,alpha,
         within_between_avg,numer_within_between_avg,denom_within_between_avg) %>% 
  pivot_longer(cols = -c(1,2,3)) %>% mutate(alpha = factor(alpha)) %>% 
  filter(nclust>2) %>% 
  ggplot(aes(x=nclust,y=value,color=method,linetype=alpha))+
  geom_line()+
  facet_wrap(~name,scales = 'free')+
  theme_minimal()+theme(legend.position = 'bottom')

Code
df_summary_eval %>% 
  select(nclust,method,alpha,everything()) %>% 
  pivot_longer(cols = -c(1,2,3)) %>% mutate(alpha = factor(alpha)) %>% 
  ggplot(aes(x=nclust,y=value,color=method,linetype=alpha))+
  geom_line()+
  facet_wrap(~name,scales = 'free')+
  theme_minimal()+theme(legend.position = 'bottom')

Code
running = F
if(running){
  
  hamming_clusters_alpha0 = readRDS(file = paste0(savingdir,'/','hamming_weighted_clusters_3plus_alpha0'))
  hamming_clusters_alpha0.1 = readRDS(file = paste0(savingdir,'/','hamming_weighted_clusters_3plus_alpha0.1'))
  hamming_clusters_alpha0.25 = readRDS(file = paste0(savingdir,'/','hamming_weighted_clusters_3plus_alpha0.25'))
  hamming_clusters_alpha0.35 = readRDS(file = paste0(savingdir,'/','hamming_weighted_clusters_3plus_alpha0.35'))
  
  hammingDistMatrix = hammingDist(dframe3plus$binnary_ASV_df %>% select(-ASV_name) %>% as.matrix())
  hammingDistMatrix = normalizeDistMatrix(hammingDistMatrix)
  
  #### ------ alpha =0 
  list_eval <- list()
  list_eval$hclust<- list()
  list_eval$pam <- list()
  
  numberOfClusters = ncol(hamming_clusters_alpha0$dFrameAsv_hclust)-1
  cols_to_evaluate = colnames(hamming_clusters_alpha0$dFrameAsv_hclust)
  cols_to_evaluate = cols_to_evaluate[-1]
  
  for(i in 2: length(cols_to_evaluate)){
    list_eval$hclust[[i-1]] = eval_clusters_distMatrix(
      distMatrix = hammingDistMatrix,
      df_ASV_Clusters = hamming_clusters_alpha0$dFrameAsv_hclust %>% select(ASV=ASV,any_of(cols_to_evaluate[i]))
    )
    list_eval$pam[[i-1]] = eval_clusters_distMatrix(
      distMatrix = hammingDistMatrix,
      df_ASV_Clusters = hamming_clusters_alpha0$dFrameAsv_pam %>% select(ASV=ASV,any_of(cols_to_evaluate[i]))
    )
  }
  saveRDS(list_eval,paste0(savingdir,'/','list_eval_weighted_3more_alpha0'))
  rm(list_eval)
  #### ------ alpha = 0.1 -----------------------------------------------------------------
  list_eval <- list()
  list_eval$hclust<- list()
  list_eval$pam <- list()
  
  numberOfClusters = ncol(hamming_clusters_alpha0.1$dFrameAsv_hclust)-1
  cols_to_evaluate = colnames(hamming_clusters_alpha0.1$dFrameAsv_hclust)
  cols_to_evaluate = cols_to_evaluate[-1]
  
  for(i in 2: length(cols_to_evaluate)){
    list_eval$hclust[[i-1]] = eval_clusters_distMatrix(
      distMatrix = hammingDistMatrix,
      df_ASV_Clusters = hamming_clusters_alpha0.1$dFrameAsv_hclust %>% select(ASV=ASV,any_of(cols_to_evaluate[i]))
    )
    list_eval$pam[[i-1]] = eval_clusters_distMatrix(
      distMatrix = hammingDistMatrix,
      df_ASV_Clusters = hamming_clusters_alpha0.1$dFrameAsv_pam %>% select(ASV=ASV,any_of(cols_to_evaluate[i]))
    )
  }
  saveRDS(list_eval,paste0(savingdir,'/','list_eval_weighted_3more_alpha0.1'))
  rm(list_eval)
  
  #### ------ alpha = 0.25 -----------------------------------------------------------------
  list_eval <- list()
  list_eval$hclust<- list()
  list_eval$pam <- list()
  
  numberOfClusters = ncol(hamming_clusters_alpha0.25$dFrameAsv_hclust)-1
  cols_to_evaluate = colnames(hamming_clusters_alpha0.25$dFrameAsv_hclust)
  cols_to_evaluate = cols_to_evaluate[-1]
  
  for(i in 2: length(cols_to_evaluate)){
    list_eval$hclust[[i-1]] = eval_clusters_distMatrix(
      distMatrix = hammingDistMatrix,
      df_ASV_Clusters = hamming_clusters_alpha0.25$dFrameAsv_hclust %>% select(ASV=ASV,any_of(cols_to_evaluate[i]))
    )
    list_eval$pam[[i-1]] = eval_clusters_distMatrix(
      distMatrix = hammingDistMatrix,
      df_ASV_Clusters = hamming_clusters_alpha0.25$dFrameAsv_pam %>% select(ASV=ASV,any_of(cols_to_evaluate[i]))
    )
  }
  saveRDS(list_eval,paste0(savingdir,'/','list_eval_weighted_3more_alpha0.25'))
  rm(list_eval)
  
  #### ------ alpha = 0.35 -----------------------------------------------------------------
  list_eval <- list()
  list_eval$hclust<- list()
  list_eval$pam <- list()
  
  numberOfClusters = ncol(hamming_clusters_alpha0.35$dFrameAsv_hclust)-1
  cols_to_evaluate = colnames(hamming_clusters_alpha0.35$dFrameAsv_hclust)
  cols_to_evaluate = cols_to_evaluate[-1]
  
  for(i in 2: length(cols_to_evaluate)){
    list_eval$hclust[[i-1]] = eval_clusters_distMatrix(
      distMatrix = hammingDistMatrix,
      df_ASV_Clusters = hamming_clusters_alpha0.35$dFrameAsv_hclust %>% select(ASV=ASV,any_of(cols_to_evaluate[i]))
    )
    list_eval$pam[[i-1]] = eval_clusters_distMatrix(
      distMatrix = hammingDistMatrix,
      df_ASV_Clusters = hamming_clusters_alpha0.35$dFrameAsv_pam %>% select(ASV=ASV,any_of(cols_to_evaluate[i]))
    )
  }
  saveRDS(list_eval,paste0(savingdir,'/','list_eval_weighted_3more_alpha0.35'))
}
Code
list_eval_alpha0 = readRDS(paste0(savingdir,'/','list_eval_weighted_3more_alpha0'))
list_eval_alpha0.1 = readRDS(paste0(savingdir,'/','list_eval_weighted_3more_alpha0.1'))
list_eval_alpha0.25 = readRDS(paste0(savingdir,'/','list_eval_weighted_3more_alpha0.25'))
list_eval_alpha0.35 = readRDS(paste0(savingdir,'/','list_eval_weighted_3more_alpha0.35'))

df_summary_eval = bind_rows(
  summarise_eval_clusters_distMatrix(list_eval_alpha0) %>% mutate(alpha=0),
  summarise_eval_clusters_distMatrix(list_eval_alpha0.1) %>% mutate(alpha=0.1),
  summarise_eval_clusters_distMatrix(list_eval_alpha0.25) %>% mutate(alpha=0.25),
  summarise_eval_clusters_distMatrix(list_eval_alpha0.35) %>% mutate(alpha=0.35))

df_summary_eval %>% 
  select(nclust,method,alpha,
         within_between_avg,numer_within_between_avg,denom_within_between_avg) %>% 
  pivot_longer(cols = -c(1,2,3)) %>% mutate(alpha = factor(alpha)) %>% 
  filter(nclust>2) %>% 
  ggplot(aes(x=nclust,y=value,color=method,linetype=alpha))+
  geom_line()+
  facet_wrap(~name,scales = 'free')+
  theme_minimal()+theme(legend.position = 'bottom')

Code
df_summary_eval %>% 
  select(nclust,method,alpha,everything()) %>% 
  pivot_longer(cols = -c(1,2,3)) %>% mutate(alpha = factor(alpha)) %>% 
  ggplot(aes(x=nclust,y=value,color=method,linetype=alpha))+
  geom_line()+
  facet_wrap(~name,scales = 'free')+
  theme_minimal()+theme(legend.position = 'bottom')